Loading libraries

library(data.table)
library(kableExtra)
library(forcats)
library(caret)
library(C50)
library(mgsub)
library(DESeq2)
library(pheatmap)
library(ggplot2)
library(ggrepel)
library(RColorBrewer)
library(limma)
library(edgeR)
library(enrichR)
library(gridExtra)
library(stringr)
library(ggVennDiagram)
library(dplyr)
library(ensembldb)
library(EnsDb.Hsapiens.v86)
library(factoextra)
library(rafalib)
library(plotly)
library(tibble)
library(devtools)
library(clusterProfiler)
library(enrichplot)
library(pathview)
library(ggprism)

Load and wrangle raw counts

# Load 1st batch raw counts
counts_raw_1 <- data.table::fread("../data/proces-data_core-facility/subreadCounts_hg38ens_minus_frag.txt", 
                                  header = TRUE, sep = "\t") %>% 
  as.data.frame()

# Load 2nd batch raw counts
counts_raw_2 <- data.table::fread("../data/proces-data_core-facility/3rd_subreadCounts_hg38ens_minus_frag.txt", 
                                  header = TRUE, sep = "\t") %>% 
  as.data.frame()

# Join raw count data frames
counts_raw <- left_join(counts_raw_1, counts_raw_2, by = "Geneid")

# Rename genes based on Emsembl annotation
edb <- EnsDb.Hsapiens.v86
edb_genes <- genes(edb) %>% as.data.frame()

# Remove unnecessary columns and convert 'Geneid' to gene names
counts_raw <- counts_raw %>%
  dplyr::select(-contains(c("Chr", "Start", "End", "Strand", "Length"))) %>%
  dplyr::mutate(Geneid = plyr::mapvalues(
    x = counts_raw$Geneid,
    from = edb_genes$gene_id,
    to = edb_genes$symbol,
    warn_missing = FALSE
  ))

# Aggregate and sum up genes with same gene symbol (basically non-coding RNAs)
counts_raw <- aggregate(counts_raw[, -1], 
                        list(Geneid = counts_raw[, 1]), 
                        FUN = sum)

# Change row names to gene names
rownames(counts_raw) <- counts_raw$Geneid
counts_raw <- dplyr::select(counts_raw, -c(Geneid))

Load metadata and match with raw counts

# Load sample metadata
sampleTable <- read.table("../data/metadata/sample_metadata.csv", 
                          sep = ",", header = TRUE) %>%
  dplyr::select(-c(age, sex))

# Load patient metadata
patient_metadata <- read.table("../data/metadata/patient_metadata.csv", 
                               sep = ",", header = TRUE)

# Join sample and patient metadata
sampleTable <- sampleTable %>% left_join(patient_metadata, by = "subject_ID")

# Change visits to dose and time point...
sampleTable <- sampleTable %>%
  mutate(
    dose = as.factor(plyr::mapvalues(visit,
      from = c(1, 2, 4, 5, 10, 11, 12),
      to   = c("0_dose1", "24_dose1", "0_dose2", "24_dose2", "0_dose3", "24_dose3", "48_dose3")
    )),
    group = as.factor(group)
  ) %>%
  # ...(sex) 0 and 1 to male and female...
  mutate(
    sex = as.factor(plyr::mapvalues(sex,
      from = c(0, 1),
      to   = c("male", "female")
    )),
    group = as.factor(sex)
  ) %>%
  # ...(prior covid) 0 and 1 to neg and pos
  mutate(
    prior_covid19 = as.factor(plyr::mapvalues(prior_covid19,
      from = c(0, 1),
      to   = c("neg", "pos")
    )),
    group = as.factor(prior_covid19)
  )

# Remove 24dose_2 from subject 21 (missing 0dose_2)
sampleTable <- subset(sampleTable, sample_ID != "P22761_1037_S37")

# Remove one sample from subject 7 (sample replicate)
sampleTable <- subset(sampleTable, sample_ID != "P26208_1060_S62")

# Remove corresponding samples from count table
counts_raw <- subset(counts_raw, select = -c(P26208_1060_S62, P22761_1037_S37))

# Match count data with sample table
colnames(counts_raw) <- sampleTable$sample_ID
counts_raw <- counts_raw[, pmatch(sampleTable$sample_ID, colnames(counts_raw))]
all(rownames(sampleTable$sample_ID) == colnames(counts_raw))

Basic Quality Control

Inspect raw data

Plotting raw counts for first visual inspection. The boxplot shows median values of zero across all samples. In the histogram there is a huge peak of zero counts. Both indicate that the data set would benefit from a low count filtering.

# Visualize distribution of raw counts with boxplot and density plot
boxplot(log2(as.matrix(counts_raw) + 1),
  ylab = expression("Log"[2] ~ "Read counts"),
  las = 2,
  main = "Raw data"
)

hist(log2(as.matrix(counts_raw) + 1),
  ylab = "",
  las = 2,
  main = "Raw data"
)

## quartz_off_screen 
##                 2

Filtering

Plot number of genes detected across samples. All samples are more or less close to average so we don’t need to discard any samples.

barplot(colSums(counts_raw > 3),
  ylab = "Number of detected genes",
  las = 2
) +
  abline(h = median(colSums(counts_raw > 3)))

## quartz_off_screen 
##                 2

Removing reads with the log2 of the counts per million (cpm) lower than 1 (= 45896 genes). Plot genes detection rate across samples, comparing raw and filtered counts.

meanLog2CPM <- rowMeans(log2(cpm(counts_raw) + 1))
counts_filtered <- counts_raw[meanLog2CPM > 1, ]

hist(rowSums(counts_raw > 3), main = title("Raw Counts"))

hist(rowSums(counts_filtered > 3), main = title("Filtered Counts"))

## quartz_off_screen 
##                 2

Plot distribution of the filtered counts

boxplot(log2(as.matrix(counts_filtered) + 1),
  ylab = expression("Log"[2] ~ "Read counts"),
  las = 2,
  main = "Filtered data"
)

hist(log2(as.matrix(counts_filtered) + 1),
  ylab = "",
  las = 2,
  main = "Filtered data"
)

## quartz_off_screen 
##                 2

DESeq object creation

Generating three separate DESeq objects with different design parameters to allow for more comparisons. Controlling for age and sex.

Normalization and Quality Control

Normalize with variance stabilizing transformation Plot distribution of data after normalization

# Normalize with variance stabilizing transformation for later PCA and heatmap
counts_vst_normalized <- vst(dds1, blind = TRUE)

vst_matrix <- assay(counts_vst_normalized)
hist(vst_matrix)

boxplot(vst_matrix, 
        ylab = expression("Log"[2] ~ "Read counts"), 
        las = 2, 
        main = "VST")

## quartz_off_screen 
##                 2

Heatmap

# Sample heatmap
sampleDist <- cor(vst_matrix, method = "spearman")

Metadata <- data.frame(sampleTable$group, sampleTable$dose)
names(Metadata) <- c("Group", "Dose")
rownames(Metadata) <- sampleTable$sample_ID

# Plot heatmap
colors <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(255)

Heatmap <- pheatmap(sampleDist,
  color = colors,
  clustering_distance_rows = as.dist(1 - sampleDist),
  clustering_distance_cols = as.dist(1 - sampleDist),
  show_rownames = FALSE,
  show_colnames = FALSE,
  clustering_method = "ward.D2",
  annotation_col = Metadata
)

print(Heatmap)

Principal Component Analysis (PCA)

Dimensionality reduction for evaluating outliers and global sample clusters for both quality control but also for primary exploratory analysis.

# PCA plot
pcaRes <- prcomp(t(assay(counts_vst_normalized)))
varExp <- round(summary(pcaRes)[[1]], digits = 2)

pcaDF <- data.frame(
  PC1 = pcaRes$x[, 1],
  PC2 = pcaRes$x[, 2],
  Group = sampleTable$group,
  Sample = sampleTable$sample_ID,
  Dose = sampleTable$dose
)

pcaPlot <- ggplot(pcaDF, mapping = aes(x = PC1, y = PC2, color = Group, label = Sample)) +
  geom_point(aes(shape = Group), size = 3) +
  labs(x = paste0("PC1 (", varExp[1], " %)"), 
       y = paste0("PC2 (", varExp[2], " %)")) +
  theme(axis.text = element_text(size = 12), 
        legend.text = element_text(size = 10)) +
  scale_color_manual(values = brewer.pal(3, "Accent")) +
  cowplot::theme_cowplot()

print(pcaPlot)

# Scree plot, showing contribution of principal components in descending order
pca.var <- pcaRes$sdev^2
pca.var.per <- round(pca.var / sum(pca.var) * 100, 1)

barplot(pca.var.per, main = "Scree Plot", 
        xlab = "Principal Component", 
        ylab = "Percent Variation")

# Explore which variables contribute the most for each PC
var <- get_pca_var(pcaRes)
var$contrib %>%
  as.data.frame() %>%
  arrange(desc(Dim.1)) %>%
  head() %>%
  kableExtra::kable() %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7 Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14 Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21 Dim.22 Dim.23 Dim.24 Dim.25 Dim.26 Dim.27 Dim.28 Dim.29 Dim.30 Dim.31 Dim.32 Dim.33 Dim.34 Dim.35 Dim.36 Dim.37 Dim.38 Dim.39 Dim.40 Dim.41 Dim.42 Dim.43 Dim.44 Dim.45 Dim.46 Dim.47 Dim.48 Dim.49 Dim.50 Dim.51 Dim.52 Dim.53 Dim.54 Dim.55 Dim.56 Dim.57 Dim.58 Dim.59 Dim.60 Dim.61 Dim.62 Dim.63 Dim.64 Dim.65 Dim.66 Dim.67 Dim.68 Dim.69 Dim.70 Dim.71 Dim.72 Dim.73 Dim.74 Dim.75 Dim.76 Dim.77 Dim.78 Dim.79 Dim.80 Dim.81 Dim.82 Dim.83 Dim.84 Dim.85 Dim.86 Dim.87 Dim.88 Dim.89 Dim.90 Dim.91 Dim.92 Dim.93 Dim.94 Dim.95 Dim.96 Dim.97
ANKRD22 0.4217933 0.2505569 0.2003826 0.0422998 0.0189043 0.0404082 0.0173591 0.0885864 0.0003649 0.0038392 0.0000020 0.0002890 0.0023223 0.0542218 0.0013778 0.0018450 0.0019280 0.0087026 0.0066551 0.0382567 0.0145244 0.0219687 0.0101429 0.0221107 0.0863425 0.0078928 0.0214618 0.1486442 0.0199885 0.0897531 0.0125324 0.0015520 0.0626238 0.0193311 0.0710494 0.0008723 0.0314203 0.0451216 0.0000439 0.0204614 0.0008208 0.0054950 0.2025240 0.0226174 0.0038141 0.0093374 0.0099374 0.0152016 0.0158981 0.0588435 0.0340965 0.0138556 0.0078743 0.0109716 0.0108596 0.0005966 0.0172369 0.0320728 0.0030809 0.0301364 0.0068172 0.0049323 0.0000600 0.0262762 0.0253411 0.0000429 0.0010599 0.0035432 0.0389973 0.0097733 0.0028502 0.0350858 0.0020317 0.0240513 0.0009528 0.0290653 0.0317125 0.0187248 0.0955228 0.0029567 0.0103937 0.0011457 0.0024625 0.0074009 0.0007027 0.0188629 0.0003051 0.0218210 0.0060650 0.1029814 0.0071412 0.0013086 0.0065278 0.0062573 0.0615636 0.0150210 0.0066786
CD274 0.3345196 0.1009233 0.0602499 0.0037822 0.0093243 0.0151708 0.0011218 0.0422326 0.0000585 0.0373816 0.0000001 0.0160423 0.0141631 0.0072173 0.0000674 0.0004801 0.0019738 0.0128860 0.0124766 0.0189533 0.0652517 0.0091969 0.0025102 0.0079132 0.0305417 0.0099214 0.0367804 0.0372367 0.0012176 0.0211233 0.0601851 0.0061029 0.0120333 0.0398387 0.0051269 0.0080009 0.0021416 0.0524895 0.0432059 0.0000008 0.0279120 0.0063363 0.0797076 0.0058193 0.0001348 0.0302692 0.0000424 0.0196492 0.0014471 0.0088813 0.0019505 0.0025966 0.0001692 0.0000287 0.0093283 0.0003272 0.0002965 0.0003802 0.0017349 0.0027832 0.0001454 0.0176013 0.0045840 0.0000061 0.0150804 0.0017304 0.0071859 0.0000229 0.0000744 0.0027884 0.0001327 0.0015583 0.0191591 0.0148019 0.0016805 0.0012315 0.0104034 0.0000007 0.0156711 0.0223596 0.0046108 0.0061336 0.0077069 0.0067730 0.0013722 0.0200496 0.0038179 0.0057087 0.0020293 0.0027199 0.0088124 0.0104140 0.0005049 0.0245188 0.0175333 0.0149133 0.0167214
IDO1 0.3135706 0.2696357 0.3278656 0.0044270 0.0332623 0.0762194 0.0009360 0.0512213 0.0001969 0.2541765 0.0015224 0.0093326 0.0372198 0.0496579 0.0084350 0.1803199 0.0703625 0.0303141 0.1736691 0.0749319 0.0031014 0.0039749 0.1507306 0.0402447 0.0279916 0.0711381 0.0436451 0.0090875 0.0022616 0.0130289 0.0550175 0.0000770 0.0091099 0.5441411 0.0000183 0.0094116 0.0873722 0.0216101 0.0026831 0.0035489 0.0012527 0.0588067 0.0046692 0.1995761 0.1781897 0.0072151 0.0012931 0.0569392 0.0020319 0.1493805 0.0122808 0.0108091 0.0911839 0.0286648 0.1431080 0.0309583 0.0588596 0.0443323 0.0376232 0.0505128 0.0361033 0.0000280 0.0582642 0.0168388 0.0571266 0.0011228 0.0338663 0.0008262 0.0003461 0.0034229 0.0195638 0.0015507 0.0000253 0.0170306 0.1114603 0.0129282 0.0014495 0.0739068 0.0745657 0.0819272 0.0379532 0.0016498 0.0050849 0.0402683 0.0507979 0.0839271 0.2357384 0.0327101 0.0978901 0.0016759 0.0010899 0.0003669 0.0131054 0.0250035 0.0098543 0.1418551 0.0285978
RSAD2 0.3115895 0.1317461 0.4946217 0.0172028 0.0163240 0.0323426 0.0835563 0.0381113 0.0134674 0.3944521 0.0335131 0.0317588 0.0685602 0.0137963 0.0702797 0.0002819 0.2669623 0.0961589 0.2121231 0.0199449 0.2080207 0.0000143 0.0726297 0.0004310 0.0218007 0.0516763 0.3343413 0.2187911 0.0168363 0.0245128 0.0476409 0.1066122 0.0436766 0.0636854 0.0476390 0.1317618 0.0711654 0.2030925 0.1134187 0.1218400 0.1969739 0.0002950 0.0454342 0.0011287 0.0012375 0.0514790 0.0329220 0.0089540 0.0048614 0.0000176 0.0038734 0.0052603 0.0000034 0.0102345 0.0402349 0.0022837 0.0069503 0.0130203 0.0796483 0.0649925 0.0241483 0.0000235 0.0029046 0.0421330 0.0330491 0.0065524 0.0047552 0.0735554 0.0107768 0.0190111 0.0235893 0.0010582 0.0125278 0.0263425 0.1101947 0.0060331 0.0054090 0.0038610 0.0000894 0.0058760 0.0073713 0.1097668 0.0000064 0.0388821 0.0003012 0.0034997 0.0913253 0.0000169 0.0565855 0.0309798 0.0001454 0.0156383 0.0335462 0.0018633 0.0116830 0.0031689 0.0164516
GBP5 0.2940112 0.1528092 0.2111231 0.0012699 0.0135266 0.0050102 0.0026284 0.0364923 0.0291111 0.0369568 0.0000656 0.0319123 0.0254056 0.0014938 0.0167442 0.0035618 0.0075121 0.0065602 0.0185175 0.0192083 0.0170086 0.0784249 0.0012418 0.0436381 0.0087262 0.0001130 0.0002480 0.0714088 0.0018897 0.0030773 0.1118770 0.0134346 0.0474526 0.0158384 0.0058500 0.0165717 0.0573069 0.0771274 0.0039274 0.0158855 0.0025136 0.0003443 0.0474877 0.0040286 0.0093260 0.0032051 0.0161607 0.0013155 0.0039888 0.0020942 0.1546967 0.0616063 0.0565971 0.0003420 0.0009767 0.0006276 0.0001408 0.0110882 0.0067066 0.0275997 0.0000000 0.0020051 0.0029772 0.0022883 0.0001281 0.0006980 0.0009212 0.0007795 0.0122504 0.0099793 0.0156720 0.0049124 0.0017352 0.0050305 0.0000348 0.0000447 0.0064097 0.0005507 0.0020133 0.0000218 0.0000959 0.0023225 0.0028271 0.0039007 0.0043974 0.0080632 0.0002832 0.0029548 0.0003998 0.0047045 0.0000665 0.0000392 0.0049562 0.0014232 0.0006277 0.0000331 0.0158614
FCGR1A 0.2823231 0.2357730 0.0728502 0.0183126 0.0099805 0.0072591 0.0067520 0.0678737 0.0460080 0.0687025 0.0068106 0.0597863 0.0081804 0.0412447 0.0410905 0.0079678 0.0030816 0.0005568 0.1678065 0.0346272 0.0428603 0.0202386 0.0197273 0.0024663 0.0286904 0.0012267 0.0034475 0.1346598 0.1084618 0.0143672 0.1182885 0.0018088 0.0289740 0.1402816 0.0443421 0.0919229 0.0254184 0.0967653 0.0218757 0.0107048 0.0017156 0.0004481 0.0320861 0.0013094 0.0465137 0.0659293 0.0164754 0.0482426 0.0097662 0.0011163 0.1404725 0.0022083 0.0500422 0.0000832 0.0035067 0.0063863 0.0076013 0.0158710 0.0013184 0.0107763 0.0113843 0.0008443 0.0056049 0.0260913 0.0446636 0.0039327 0.0000021 0.0191528 0.0000000 0.0408724 0.0379039 0.0000275 0.0046322 0.0000443 0.0000648 0.0411028 0.0308248 0.0024982 0.0027832 0.0136494 0.0003288 0.0003578 0.0405637 0.0066961 0.0000928 0.0002705 0.0001596 0.0000040 0.0005642 0.0102951 0.0013844 0.0079818 0.0000093 0.0155205 0.0005003 0.0000493 0.0001487
## quartz_off_screen 
##                 2

Differential Gene Expression Analysis

Differential Gene Expression Analysis using DESeq2. Comparing parameters; dose, timepoint, prior infection status.

Volcano plots

Volcano plot for data exploratory analysis, filtering for significant values with False Discovery Rate < 0.05 and log fold change greater than 1.

# check only the negative fold change to see if there's a different pathway of negative fold changes

results_list <- list(
  base_pos_neg_d1,
  base_pos_neg_d2,
  base_pos_neg_d3,
  base_pos_d1_d2,
  base_pos_d1_d3,
  base_pos_d2_d3,
  base_neg_d1_d2,
  base_neg_d1_d3,
  base_neg_d2_d3,
  neg_d1,
  neg_d2,
  neg_d3_24,
  neg_d3_48,
  pos_d1,
  pos_d2,
  pos_d3_24,
  pos_d3_48,
  pos,
  neg,
  d1,
  d2,
  d3_24,
  d3_48
)

names(results_list) <- c(
  "Baseline Dose 1 Conv vs Naïve",
  "Baseline Dose 2 Conv vs Naïve",
  "Baseline Dose 3 Conv vs Naïve",
  "Baseline Conv Dose 1 and 2",
  "Baseline Conv Dose 1 and 3",
  "Baseline Conv Dose 2 and 3",
  "Baseline Naïve Dose 1 and 2",
  "Baseline Naïve Dose 1 and 3",
  "Baseline Naïve Dose 2 and 3",
  "Naïve Dose 1 0-24 hours",
  "Naïve Dose 2 0-24 hours",
  "Naïve Dose 3 0-24 hours",
  "Naïve Dose 3 0-48 hours",
  "Conv Dose 1 0-24 hours",
  "Conv Dose 2 0-24 hours",
  "Conv Dose 3 0-24 hours",
  "Conv Dose 3 0-48 hours",
  "Conv 0-24 hours",
  "Naïve 0-24 hours",
  "Dose 1 0-24 hours",
  "Dose 2 0-24 hours",
  "Dose 3 0-24 hours",
  "Dose 3 0-48 hours"
)

# Optional: uncomment line below to remove all comparisons except naive vs conv at each timepoint
# results_list <- results_list[10:17]

# filter up- and downregulated plots
results_list_plot <- lapply(results_list, function(x) {
  x <- x %>% mutate(
    test_padj = case_when(
      padj < 0.05 & log2FoldChange >= 1 ~ "Up regulated",
      padj < 0.05 & log2FoldChange <= -1 ~ "Down regulated",
      TRUE ~ "Not significant"
    ),
    test_pvalue = case_when(
      pvalue < 0.05 & log2FoldChange >= 1 ~ "Up regulated",
      pvalue < 0.05 & log2FoldChange <= -1 ~ "Down regulated",
      TRUE ~ "Not significant"
    ),
  )
})

# function to make volcano plots
volcano_plot <- function(x) {
  subsetted <- subset(
    results_list_plot[[x]] %>%
      tibble::rownames_to_column("gene_symbol"),
    abs(log2FoldChange) >= 1 & pvalue < 0.05
  )
  # count up regulated DEGs
  deg_number_up <- subset(
    results_list_plot[[x]] %>%
      tibble::rownames_to_column("gene_symbol"),
    log2FoldChange >= 1 & pvalue < 0.05
  ) %>%
    nrow()
  # count down regulated DEGs
  deg_number_down <- subset(
    results_list_plot[[x]] %>%
      tibble::rownames_to_column("gene_symbol"),
    log2FoldChange <= -1 & pvalue < 0.05
  ) %>%
    nrow()

  results_list_plot[[x]] %>%
    ggplot(aes(x = log2FoldChange, y = -log10(pvalue))) +
    geom_point(aes(colour = test_pvalue), size = 3, alpha = 0.3) +
    scale_color_manual(values = c(
      "Down regulated" = "blue",
      "Not significant" = "grey80",
      "Up regulated" = "red"
    )) +
    xlab(expression("Fold Change (Log"[2] * ")")) +
    ylab(expression("-Log"[10] * "(p-value)")) +
    labs(x = NULL, y = NULL) +
    geom_vline(xintercept = c(-1), linetype = "dotted", size = 1) +
    geom_vline(xintercept = c(1), linetype = "dotted", size = 1) +
    geom_hline(yintercept = -log10(0.05), linetype = "dotted", size = 1) +
    # geom_label_repel(data = subsetted, 
    # aes(log2FoldChange,-log10(pvalue),label= gene_symbol),
    # max.overlaps = 10) +
    xlim(-2, 6) +
    ylim(0, 20) +
    ggtitle(x) +
    annotate(
      geom = "text", colour = "red", size = 10, x = 4, y = 10, hjust = 0,
      label = paste0(deg_number_up)
    ) +
    annotate(
      geom = "text", colour = "blue", size = 10, x = -2, y = 10, hjust = 0,
      label = paste0(deg_number_down)
    ) +
    ggprism::theme_prism() +
    theme(axis.ticks = element_line(size = .5), legend.position = "none")
}

volcano_plots <- lapply(names(results_list_plot), volcano_plot)
names(volcano_plots) <- names(results_list_plot)
volcano_plots
## $`Baseline Dose 1 Conv vs Naïve`
## 
## $`Baseline Dose 2 Conv vs Naïve`
## 
## $`Baseline Dose 3 Conv vs Naïve`
## 
## $`Baseline Conv Dose 1 and 2`
## 
## $`Baseline Conv Dose 1 and 3`
## 
## $`Baseline Conv Dose 2 and 3`
## 
## $`Baseline Naïve Dose 1 and 2`
## 
## $`Baseline Naïve Dose 1 and 3`
## 
## $`Baseline Naïve Dose 2 and 3`
## 
## $`Naïve Dose 1 0-24 hours`

## 
## $`Naïve Dose 2 0-24 hours`

## 
## $`Naïve Dose 3 0-24 hours`

## 
## $`Naïve Dose 3 0-48 hours`

## 
## $`Conv Dose 1 0-24 hours`

## 
## $`Conv Dose 2 0-24 hours`

## 
## $`Conv Dose 3 0-24 hours`

## 
## $`Conv Dose 3 0-48 hours`

## 
## $`Conv 0-24 hours`
## 
## $`Naïve 0-24 hours`
## 
## $`Dose 1 0-24 hours`
## 
## $`Dose 2 0-24 hours`
## 
## $`Dose 3 0-24 hours`
## 
## $`Dose 3 0-48 hours`
## [[1]]
## [1] "../results/2022-08-05/figures/Baseline Dose 1 Conv vs Naïve_volcano_plot.png"
## 
## [[2]]
## [1] "../results/2022-08-05/figures/Baseline Dose 2 Conv vs Naïve_volcano_plot.png"
## 
## [[3]]
## [1] "../results/2022-08-05/figures/Baseline Dose 3 Conv vs Naïve_volcano_plot.png"
## 
## [[4]]
## [1] "../results/2022-08-05/figures/Baseline Conv Dose 1 and 2_volcano_plot.png"
## 
## [[5]]
## [1] "../results/2022-08-05/figures/Baseline Conv Dose 1 and 3_volcano_plot.png"
## 
## [[6]]
## [1] "../results/2022-08-05/figures/Baseline Conv Dose 2 and 3_volcano_plot.png"
## 
## [[7]]
## [1] "../results/2022-08-05/figures/Baseline Naïve Dose 1 and 2_volcano_plot.png"
## 
## [[8]]
## [1] "../results/2022-08-05/figures/Baseline Naïve Dose 1 and 3_volcano_plot.png"
## 
## [[9]]
## [1] "../results/2022-08-05/figures/Baseline Naïve Dose 2 and 3_volcano_plot.png"
## 
## [[10]]
## [1] "../results/2022-08-05/figures/Naïve Dose 1 0-24 hours_volcano_plot.png"
## 
## [[11]]
## [1] "../results/2022-08-05/figures/Naïve Dose 2 0-24 hours_volcano_plot.png"
## 
## [[12]]
## [1] "../results/2022-08-05/figures/Naïve Dose 3 0-24 hours_volcano_plot.png"
## 
## [[13]]
## [1] "../results/2022-08-05/figures/Naïve Dose 3 0-48 hours_volcano_plot.png"
## 
## [[14]]
## [1] "../results/2022-08-05/figures/Conv Dose 1 0-24 hours_volcano_plot.png"
## 
## [[15]]
## [1] "../results/2022-08-05/figures/Conv Dose 2 0-24 hours_volcano_plot.png"
## 
## [[16]]
## [1] "../results/2022-08-05/figures/Conv Dose 3 0-24 hours_volcano_plot.png"
## 
## [[17]]
## [1] "../results/2022-08-05/figures/Conv Dose 3 0-48 hours_volcano_plot.png"
## 
## [[18]]
## [1] "../results/2022-08-05/figures/Conv 0-24 hours_volcano_plot.png"
## 
## [[19]]
## [1] "../results/2022-08-05/figures/Naïve 0-24 hours_volcano_plot.png"
## 
## [[20]]
## [1] "../results/2022-08-05/figures/Dose 1 0-24 hours_volcano_plot.png"
## 
## [[21]]
## [1] "../results/2022-08-05/figures/Dose 2 0-24 hours_volcano_plot.png"
## 
## [[22]]
## [1] "../results/2022-08-05/figures/Dose 3 0-24 hours_volcano_plot.png"
## 
## [[23]]
## [1] "../results/2022-08-05/figures/Dose 3 0-48 hours_volcano_plot.png"

Enrichment analysis

Selecting two different databases for search for enrichment analysis. The databases selected were “GO_Biological_Process_2021” and “KEGG_2021_Human”.

Also testing the Blood Transcriptome modules presented in Li et al. 2014 using some GSEA package.

Over-Representation Analysis

Over-representation looks for biological pathways that are enriched more than would be expected by chance. It only takes the genes that were found to be differentially expressed into account.

# Remove items without significant results from list
enrichment_list_up[sapply(enrichment_list_up, is.null)] <- NULL
enrichment_list_down[sapply(enrichment_list_down, is.null)] <- NULL

# Optional: uncomment two lines below to remove all comparisons except naive vs conv at each timepoint
# enrichment_list_up <- enrichment_list_up[3:9]
# enrichment_list_down <- enrichment_list_down[3:6]

# function for plotting over-representation analysis
plot_enrichment <- function(enrichment_list) {
  plots_enrich <- list()
  for (pathway in 1:length(enrichment_list[[1]])) {
    rbinded_enrich_list <- lapply(enrichment_list, function(x) x[[pathway]])
    print(names(enrichment_list[[1]])[[pathway]])
    rbinded_enrich_df <- data.table::rbindlist(l = rbinded_enrich_list, idcol = TRUE, fill = TRUE)

    plots_enrich[[pathway]] <- rbinded_enrich_df %>%
      group_by(.id) %>%
      arrange(Adjusted.P.value) %>%
      slice(1:10)
  }
  plots <- lapply(plots_enrich, function(x) {
    x %>%
      ggplot(aes(x = Term, y = -log10(Adjusted.P.value))) +
      geom_bar(stat = "identity", width = 0.05) +
      geom_point(size = 3) +
      theme_minimal() +
      theme(
        text = element_text(size = 10),
        plot.title = element_text(hjust = (5 / nchar(results_list)) * 2),
        plot.margin = margin(t = 5, r = 50, b = 5, unit = "pt"),
        axis.text.y = element_text(size = 8)
      ) +
      coord_flip() +
      geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "gray") +
      theme_bw() +
      labs(x = NULL, y = expression("Adjusted P value, Log"[10] * "")) +
      facet_wrap(~.id, ncol = 4)
  })
  return(plots)
}

# Plotting over-representation analysis
enrichment_plot_up <- plot_enrichment(enrichment_list_up)
enrichment_plot_down <- plot_enrichment(enrichment_list_down)

names(enrichment_plot_up) <- c(
  "GO_Biological_Process_2021_up",
  "KEGG_2021_Human_up",
  "GO_Molecular_Function_2021_up"
)

names(enrichment_plot_down) <- c(
  "GO_Biological_Process_2021_down",
  "KEGG_2021_Human_down",
  "GO_Molecular_Function_2021_down"
)
print(enrichment_plot_up)

print(enrichment_plot_down)

## [[1]]
## [1] "../results/2022-08-05/figures/GO_Biological_Process_2021_up_EA.png"
## 
## [[2]]
## [1] "../results/2022-08-05/figures/KEGG_2021_Human_up_EA.png"
## 
## [[3]]
## [1] "../results/2022-08-05/figures/GO_Molecular_Function_2021_up_EA.png"
## [[1]]
## [1] "../results/2022-08-05/figures/GO_Biological_Process_2021_down_EA.png"
## 
## [[2]]
## [1] "../results/2022-08-05/figures/KEGG_2021_Human_down_EA.png"
## 
## [[3]]
## [1] "../results/2022-08-05/figures/GO_Molecular_Function_2021_down_EA.png"

Gene Set Enrichment Analysis

GSEA looks for patterns of enriched genes which are related to each other. Compared with over-representation analysis this does not rely solely on DEGs. Therefore, this method is better when the differences in gene expression is smaller.

Annotations

Prepare Input

process_to_gsea <- function(x) {
  original_gene_list <- x$log2FoldChange
  names(original_gene_list) <- rownames(x)
  gene_list <- na.omit(original_gene_list)
  gene_list <- sort(gene_list, decreasing = TRUE)
}

ls_processed <- lapply(results_list, process_to_gsea)

Gene Set Enrichment

Ridgeplot

Helpful to interpret up/down-regulated pathways.

# Optional: uncomment line below to remove all comparisons except naive vs conv at each timepoint
# list_gse <- list_gse[10:17]


ridgeplotting <- function(x) {
  list_gse[[x]] %>%
    ridgeplot(fill = "enrichmentScore") +
    viridis::scale_fill_viridis(option = "turbo") +
    ggtitle(x) +
    cowplot::theme_cowplot(
      font_size = 6,
      line_size = 1.5,
      rel_small = 1.6,
      rel_tiny = 1,
      font_family = "Arial",
      rel_large = 4
    ) +
    theme(
      legend.position = "right",
      legend.direction = "vertical",
      legend.key.size = unit(1, "cm"),
      legend.title = element_text(size = 16, face = "bold", family = "Arial"),
      legend.text = element_text(size = 14),
      axis.title = element_text(size = 35),
      axis.text.x = element_text(size = 16),
      axis.text.y = element_text(size = 16)
    ) +
    scale_y_discrete()
}

ridgeplots <- lapply(names(list_gse), ridgeplotting)
names(ridgeplots) <- names(list_gse)
print(ridgeplots)
## $`Baseline Dose 1 Conv vs Naïve`
## 
## $`Baseline Dose 2 Conv vs Naïve`
## 
## $`Baseline Dose 3 Conv vs Naïve`
## 
## $`Baseline Conv Dose 1 and 2`
## 
## $`Baseline Conv Dose 1 and 3`
## 
## $`Baseline Conv Dose 2 and 3`
## 
## $`Baseline Naïve Dose 1 and 2`
## 
## $`Baseline Naïve Dose 1 and 3`
## 
## $`Baseline Naïve Dose 2 and 3`
## 
## $`Naïve Dose 1 0-24 hours`

## 
## $`Naïve Dose 2 0-24 hours`

## 
## $`Naïve Dose 3 0-24 hours`

## 
## $`Naïve Dose 3 0-48 hours`

## 
## $`Conv Dose 1 0-24 hours`

## 
## $`Conv Dose 2 0-24 hours`

## 
## $`Conv Dose 3 0-24 hours`

## 
## $`Conv Dose 3 0-48 hours`

## 
## $`Conv 0-24 hours`
## 
## $`Naïve 0-24 hours`
## 
## $`Dose 1 0-24 hours`
## 
## $`Dose 2 0-24 hours`
## 
## $`Dose 3 0-24 hours`
## 
## $`Dose 3 0-48 hours`
## [[1]]
## [1] "../results/2022-08-05/figures/Baseline Dose 1 Conv vs Naïve_BTM_plot.png"
## 
## [[2]]
## [1] "../results/2022-08-05/figures/Baseline Dose 2 Conv vs Naïve_BTM_plot.png"
## 
## [[3]]
## [1] "../results/2022-08-05/figures/Baseline Dose 3 Conv vs Naïve_BTM_plot.png"
## 
## [[4]]
## [1] "../results/2022-08-05/figures/Baseline Conv Dose 1 and 2_BTM_plot.png"
## 
## [[5]]
## [1] "../results/2022-08-05/figures/Baseline Conv Dose 1 and 3_BTM_plot.png"
## 
## [[6]]
## [1] "../results/2022-08-05/figures/Baseline Conv Dose 2 and 3_BTM_plot.png"
## 
## [[7]]
## [1] "../results/2022-08-05/figures/Baseline Naïve Dose 1 and 2_BTM_plot.png"
## 
## [[8]]
## [1] "../results/2022-08-05/figures/Baseline Naïve Dose 1 and 3_BTM_plot.png"
## 
## [[9]]
## [1] "../results/2022-08-05/figures/Baseline Naïve Dose 2 and 3_BTM_plot.png"
## 
## [[10]]
## [1] "../results/2022-08-05/figures/Naïve Dose 1 0-24 hours_BTM_plot.png"
## 
## [[11]]
## [1] "../results/2022-08-05/figures/Naïve Dose 2 0-24 hours_BTM_plot.png"
## 
## [[12]]
## [1] "../results/2022-08-05/figures/Naïve Dose 3 0-24 hours_BTM_plot.png"
## 
## [[13]]
## [1] "../results/2022-08-05/figures/Naïve Dose 3 0-48 hours_BTM_plot.png"
## 
## [[14]]
## [1] "../results/2022-08-05/figures/Conv Dose 1 0-24 hours_BTM_plot.png"
## 
## [[15]]
## [1] "../results/2022-08-05/figures/Conv Dose 2 0-24 hours_BTM_plot.png"
## 
## [[16]]
## [1] "../results/2022-08-05/figures/Conv Dose 3 0-24 hours_BTM_plot.png"
## 
## [[17]]
## [1] "../results/2022-08-05/figures/Conv Dose 3 0-48 hours_BTM_plot.png"
## 
## [[18]]
## [1] "../results/2022-08-05/figures/Conv 0-24 hours_BTM_plot.png"
## 
## [[19]]
## [1] "../results/2022-08-05/figures/Naïve 0-24 hours_BTM_plot.png"
## 
## [[20]]
## [1] "../results/2022-08-05/figures/Dose 1 0-24 hours_BTM_plot.png"
## 
## [[21]]
## [1] "../results/2022-08-05/figures/Dose 2 0-24 hours_BTM_plot.png"
## 
## [[22]]
## [1] "../results/2022-08-05/figures/Dose 3 0-24 hours_BTM_plot.png"
## 
## [[23]]
## [1] "../results/2022-08-05/figures/Dose 3 0-48 hours_BTM_plot.png"

Combined GSEA heatmap

Using the Blood Transcriptome modules presented in Li et al. 2014.

my_palette <- colorRampPalette(c("#265891", "white", "#B80F20"))(n = 20)
sel_list_gse <- list_gse[c(
  "Naïve Dose 1 0-24 hours",
  "Naïve Dose 2 0-24 hours",
  "Naïve Dose 3 0-24 hours",
  "Naïve Dose 3 0-48 hours",
  "Conv Dose 1 0-24 hours",
  "Conv Dose 2 0-24 hours",
  "Conv Dose 3 0-24 hours",
  "Conv Dose 3 0-48 hours"
)]

combined_gsea <- data.table::rbindlist(lapply(sel_list_gse, as.data.frame), idcol = TRUE) %>%
  group_by(.id) %>%
  arrange(desc(NES)) %>%
  ungroup()

to_heatmap <- combined_gsea %>%
  select(.id, ID, NES) %>%
  filter(NES > 2 | NES < -2, !grepl("TBA", ID)) %>%
  tidyr::pivot_wider(values_from = NES, names_from = ID) %>%
  mutate(across(where(anyNA), ~ tidyr::replace_na(., 0))) %>%
  t()
colnames(to_heatmap) <- to_heatmap[1, ]
to_heatmap <- to_heatmap[-1, ]

col_order <- c(
  "Naïve Dose 1 0-24 hours",
  "Conv Dose 1 0-24 hours",
  "Naïve Dose 2 0-24 hours",
  "Conv Dose 2 0-24 hours",
  "Naïve Dose 3 0-24 hours",
  "Conv Dose 3 0-24 hours",
  "Naïve Dose 3 0-48 hours",
  "Conv Dose 3 0-48 hours"
)

to_heatmap <- to_heatmap[, col_order]

BTM_heatmap <- matrix(as.numeric(to_heatmap), ncol = ncol(to_heatmap)) %>%
    pheatmap::pheatmap(
      cellwidth = 20,
      cellheight = 5,
      cluster_rows = TRUE,
      cluster_cols = FALSE,
      clustering_method = "ward.D2",
      angle_col = 45,
      border_color = "black",
      cutree_cols = 4,
      color = my_palette,
      labels_col = colnames(to_heatmap),
      labels_row = rownames(to_heatmap),
      treeheight_col = 1,
      fontsize_row = 6,
      fontsize_col = 8,
      gaps_col = c(2, 4, 6)
    )
BTM_heatmap

Venn Diagram of DEGs

extract_degs_names <- function(x) {
  subsetted <- results_list_plot[[x]] %>%
    tibble::rownames_to_column("gene_symbol") %>%
    filter(log2FoldChange > 1 & padj < 0.05) %>%
    select(gene_symbol, log2FoldChange)
  return(subsetted)
}

degs_names <- lapply(names(results_list_plot), extract_degs_names)
names(degs_names) <- names(results_list_plot)
to_venn <- list(
  conv = degs_names[["Conv 0-24 hours"]][[1]],
  naive = degs_names[["Naïve 0-24 hours"]][[1]]
)

ggVennDiagram(to_venn, show_intersect = TRUE)